library(data.table)
library(ggplot2)
library(lfe)
library(Hmisc)
library(stargazer)
library(scales) 


bd <- fread('../data/bankdata_conventional.csv')        # Bank capitalization data

d.in[,cr.temp := cr/100]
d.in$cr <- NULL
d.in[,cr := cr.temp]

d.in[, held_share := held_volume/total_volume]
d.in[,jumbo_share := jumbo_volume/total_volume]
d.in[,conf_held := conforming_held_volume/conforming_volume]

##table1: summary statistics for bank-level data
ss <- d.in[bank == 1,j=list(nBanks = length(unique(rssdid)),cr = weighted.mean(cr*100,w=total_volume,na.rm=T),held_share = weighted.mean(held_share*100,w=total_volume, na.rm=T),
                            jumbo_share = weighted.mean(jumbo_share*100,w=total_volume,na.rm=T),jumbo_held = weighted.mean(jumbo_held_volume/jumbo_volume*100,w=jumbo_volume,na.rm=T),conf_held = weighted.mean(conf_held,w=conforming_volume*100,na.rm=T)),by='year']

fwrite(ss,"table1.csv")


# Figure 1d: Capital ratios over time
capRatio <- d.in[,j=list(m=mean(cr,na.rm=T),wm = weighted.mean(cr,w=total_volume,na.rm=T)),by='year']
ggplot(capRatio) + geom_line(aes(x=year,y=m)) + geom_line(aes(x=year,y=wm),linetype = 'dashed') + theme_bw() + scale_y_continuous(labels = function(x) {paste0(x*100,'%')}) + scale_x_continuous(breaks = seq(2007,2017,by=2)) + xlab(NULL) + ylab(NULL) 
ggsave('figure_1_d.png',height=3,width=5,units='in')



##Figure 4: held on cr, within and across lenders
held.xs.reg <- lm(held_share ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding + core_deposits+ as.factor(year) ,data=d.in,weights=d.in$total_volume)
cr.xs.reg   <- lm(cr   ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding + core_deposits+ as.factor(year) ,data=d.in,weights=d.in$total_volume)
d.in[,held.xs.resid := held_share - predict(held.xs.reg,d.in)]
d.in[,cr.xs.resid   := cr   - predict(cr.xs.reg,d.in)]
d.in[,cr.resid.xs.bin := ave(cr.xs.resid,cut2(cr.xs.resid,g=25),na.rm=T)]
temp.model <- lm(held.xs.resid ~ cr.xs.resid,data=d.in,weights=d.in$total_volume)
qq <- d.in[,j=list(m = weighted.mean(held.xs.resid,w=total_volume,na.rm=T),n = sum(total_volume,na.rm=T)),by=cr.resid.xs.bin]
qq[,cr.xs.resid := cr.resid.xs.bin]
qq[,held.predict := predict(temp.model,qq,interval = 'confidence')[,'fit']]
qq[,held.predict.l := predict(temp.model,qq,interval = 'confidence')[,'lwr']]
qq[,held.predict.u := predict(temp.model,qq,interval = 'confidence')[,'upr']]
ggplot(qq) + geom_point(aes(x=cr.resid.xs.bin,y=m,size=n)) + theme_bw() + theme(legend.position = 'none') + xlab('CR % (residual)') + ylab('Held % (residual)') + 
  scale_x_continuous(labels = function(x) paste0(x, "%")) + scale_y_continuous(labels = percent)  + geom_line(aes(x=cr.resid.xs.bin,y=held.predict),linetype = 'dashed') + geom_ribbon(aes(x=cr.resid.xs.bin,ymin=held.predict.l,ymax=held.predict.u),alpha=.25)  
ggsave('figure_4_a.png',height=3,width=5,units = 'in')

# Figure 4B -- 
# Banks with problematic data when backing out bank FE.
d.in[,n.years := .N,by='rssdid']
d.in.wi <- d.in[!(rssdid %in% c('5050028','1632','2938','4536084'))]
#d.in.wi <- d.in # [!(rssdid %in% c('706896', '714468','99-0057212')) ] # Not enough observations to identify...
held.wi.reg <- felm(held_share ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding+ core_deposits | as.factor(year) + as.factor(rssdid) ,data=d.in.wi,weights=d.in.wi$total_volume)
cr.wi.reg   <- felm(cr   ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding+ core_deposits | as.factor(year) + as.factor(rssdid) ,data=d.in.wi,weights=d.in.wi$total_volume)

dt <- data.table(held.resid = held.wi.reg$residuals,cr.resid = cr.wi.reg$residuals,ww = held.wi.reg$weights)
temp.model <- lm(held.resid.held_share ~ cr.resid.cr,data=dt,weights=dt$ww)
dt[,cr.resid.bin := ave(cr.resid.cr,cut2(cr.resid.cr,g=25),na.rm=T) ]

qq <- dt[,j=list(m = weighted.mean(held.resid.held_share,w=ww,na.rm=T),n = sum(ww,na.rm=T)),by=cr.resid.bin]
qq[,cr.resid.cr := cr.resid.bin]
qq[,held.predict := predict(temp.model,qq,interval = 'confidence')[,'fit']]
qq[,held.predict.l := predict(temp.model,qq,interval = 'confidence')[,'lwr']]
qq[,held.predict.u := predict(temp.model,qq,interval = 'confidence')[,'upr']]
ggplot(qq) + geom_point(aes(x=cr.resid.bin,y=m,size=n)) + theme_bw() + theme(legend.position = 'none') + xlab('CR % (residual)') + ylab('Held % (residual)') + 
  scale_x_continuous(labels = function(x) paste0(x, "%")) + scale_y_continuous(labels = percent) + geom_line(aes(x=cr.resid.bin,y=held.predict),linetype = 'dashed') + geom_ribbon(aes(x=cr.resid.bin,ymin=held.predict.l,ymax=held.predict.u),alpha=.25)  
ggsave('figure_4_b.png',height=3,width=5,units = 'in')


# Figure 2A -- 
jumbo.xs.reg <- lm(jumbo_share ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding + core_deposits+ as.factor(year) ,data=d.in,weights=d.in$total_originations)
cr.xs.reg   <- lm(cr   ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding + core_deposits+ as.factor(year) ,data=d.in,weights=d.in$total_originations)
d.in[,jumbo.xs.resid := jumbo_share - predict(jumbo.xs.reg,d.in)]
d.in[,cr.xs.resid   := cr   - predict(cr.xs.reg,d.in)]
d.in[,cr.resid.xs.bin := ave(cr.xs.resid,cut2(cr.xs.resid,g=25),na.rm=T)]
temp.model <- lm(jumbo.xs.resid ~ cr.xs.resid,data=d.in,weights=d.in$total_originations)
qq <- d.in[,j=list(m = weighted.mean(jumbo.xs.resid,w=total_originations,na.rm=T),n = sum(total_volume,na.rm=T)),by=cr.resid.xs.bin]
qq[,cr.xs.resid := cr.resid.xs.bin]
qq[,jumbo.predict := predict(temp.model,qq,interval = 'confidence')[,'fit']]
qq[,jumbo.predict.l := predict(temp.model,qq,interval = 'confidence')[,'lwr']]
qq[,jumbo.predict.u := predict(temp.model,qq,interval = 'confidence')[,'upr']]
ggplot(qq) + geom_point(aes(x=cr.resid.xs.bin,y=m,size=n)) + theme_bw() + theme(legend.position = 'none') + xlab('CR % (residual)') + ylab('Jumbo % (residual)') + 
  scale_x_continuous(labels = function(x) paste0(x, "%")) + scale_y_continuous(labels = percent)  + geom_line(aes(x=cr.resid.xs.bin,y=jumbo.predict),linetype = 'dashed') + geom_ribbon(aes(x=cr.resid.xs.bin,ymin=jumbo.predict.l,ymax=jumbo.predict.u),alpha=.25)  
ggsave('figure_2_a.png',height=3,width=5,units = 'in')


# Figure 2B -- 
d.in[,n.years := .N,by='rssdid']
d.in.wi <- d.in[!(rssdid %in% c('5050028','1632','2938','4536084'))]
jumbo.wi.reg <- felm(jumbo_share ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding+ core_deposits | as.factor(year) + as.factor(rssdid) ,data=d.in.wi,weights=d.in.wi$total_volume)
cr.wi.reg   <- felm(cr   ~  log(total_assets) + log(income) + log(loan_amount) + I(total_deposits / total_assets) + noncore_funding+ core_deposits | as.factor(year) + as.factor(rssdid) ,data=d.in.wi,weights=d.in.wi$total_volume)

dt <- data.table(jumbo.resid = jumbo.wi.reg$residuals,cr.resid = cr.wi.reg$residuals,ww = jumbo.wi.reg$weights)
temp.model <- lm(jumbo.resid.jumbo_share ~ cr.resid.cr,data=dt,weights=dt$ww)
dt[,cr.resid.bin := ave(cr.resid.cr,cut2(cr.resid.cr,g=25),na.rm=T) ]

qq <- dt[,j=list(m = weighted.mean(jumbo.resid.jumbo_share,w=ww,na.rm=T),n = sum(ww,na.rm=T)),by=cr.resid.bin]
qq[,cr.resid.cr := cr.resid.bin]
qq[,jumbo.predict := predict(temp.model,qq,interval = 'confidence')[,'fit']]
qq[,jumbo.predict.l := predict(temp.model,qq,interval = 'confidence')[,'lwr']]
qq[,jumbo.predict.u := predict(temp.model,qq,interval = 'confidence')[,'upr']]
ggplot(qq) + geom_point(aes(x=cr.resid.bin,y=m,size=n)) + theme_bw() + theme(legend.position = 'none') + xlab('CR % (residual)') + ylab('jumbo % (residual)') + 
  scale_x_continuous(labels = percent) + scale_y_continuous(labels = percent) + geom_line(aes(x=cr.resid.bin,y=jumbo.predict),linetype = 'dashed') + geom_ribbon(aes(x=cr.resid.bin,ymin=jumbo.predict.l,ymax=jumbo.predict.u),alpha=.25)  
ggsave('figure_2_b.png',height=3,width=5,units = 'in')
